/*******************************************************************************
Description

This file calls and coordinates several other do-files to create the dataset
"processed_datasets/main_dataset_meat"
*******************************************************************************/

* Define Stata Version
version 16

*-------------------------------------------------------------------------------
* Stablishing working directory
cd "$workdirectory"

* Settings *********************************************************************
capture log close
clear all
set more off

********************************************************************************
**# Load the dataset containing total population and urbanization rate
/*******************************************************************************
* Source: WUP 2018
* Notes: Data accessed 11.13.2020
*******************************************************************************/

* Import data
import excel "raw_datasets/urbanpop.xlsx", sheet("urbanpop") firstrow clear

* Format and label variables
reshape long urbrate pop, i(country_wup) j(year)
la var urbrate "Urbanization rate (%) in t"
la var pop "Population (000s) in t"

* Modifying Susdan (See readme for details)

replace urbrate = 7.440641671 if country_wup == "Sudan" & year == 1950
replace urbrate = 8.647428578 if country_wup == "Sudan" & year == 1955
replace urbrate = 10.19919799 if country_wup == "Sudan" & year == 1960
replace urbrate = 12.1403479 if country_wup == "Sudan" & year == 1965
replace urbrate = 14.52181423 if country_wup == "Sudan" & year == 1970
replace urbrate = 16.40269415 if country_wup == "Sudan" & year == 1975
replace urbrate = 17.24939498 if country_wup == "Sudan" & year == 1980
replace urbrate = 19.88963399 if country_wup == "Sudan" & year == 1985
replace urbrate = 25.3251662 if country_wup == "Sudan" & year == 1990
replace urbrate = 29.36590485 if country_wup == "Sudan" & year == 1995
replace urbrate = 29.53349817 if country_wup == "Sudan" & year == 2000
replace urbrate = 29.70415021 if country_wup == "Sudan" & year == 2005
replace urbrate = 29.80201095 if country_wup == "Sudan" & year == 2010
replace urbrate = 30.64552269 if country_wup == "Sudan" & year == 2015
replace urbrate = 32.19156798 if country_wup == "Sudan" & year == 2020

replace pop = 8216 if country_wup == "Sudan" & year == 1950
replace pop = 9169 if country_wup == "Sudan" & year == 1955
replace pop = 10387 if country_wup == "Sudan" & year == 1960
replace pop = 11903 if country_wup == "Sudan" & year == 1965
replace pop = 13776 if country_wup == "Sudan" & year == 1970
replace pop = 16086 if country_wup == "Sudan" & year == 1975
replace pop = 19010 if country_wup == "Sudan" & year == 1980
replace pop = 22431 if country_wup == "Sudan" & year == 1985
replace pop = 25640 if country_wup == "Sudan" & year == 1990
replace pop = 29213 if country_wup == "Sudan" & year == 1995
replace pop = 33474 if country_wup == "Sudan" & year == 2000
replace pop = 38485 if country_wup == "Sudan" & year == 2005
replace pop = 44053 if country_wup == "Sudan" & year == 2010
replace pop = 49619 if country_wup == "Sudan" & year == 2015
replace pop = 55043 if country_wup == "Sudan" & year == 2020

* Save as core dataset
save "processed_datasets/main_dataset_meat", replace

********************************************************************************
**# Load country codes, names, continents and regions
/*******************************************************************************
- Source: country names from WUP 2018, codes and regions from WB WDI Metadata
- Notes: Match between WB and WUP datasets have been done in the xls file. Data 
	     accessed 11.19.2020
*******************************************************************************/

* Import data
import excel "raw_datasets/ccode.xlsx", sheet("ccode") firstrow clear

* Generate dummies for regions and continents and then save
tab(region_wb), gen(region_)
tab(continent), gen(continent_)
save "processed_datasets/ccode", replace

* Merge with main_dataset
use "processed_datasets/main_dataset_meat", clear
merge m:1 country_wup using "processed_datasets/ccode"
keep if _merge == 3
drop _merge
save "processed_datasets/main_dataset_meat", replace

********************************************************************************
**# Add Population in the largest city as % of urban population variable
********************************************************************************

* Prepare data
do "codes/secondary_dos/urban_prim_data_creator.do"

* Merge with core dataset, label and save
use "processed_datasets/main_dataset_meat", clear
merge 1:1 code_wb year using "processed_datasets/dataset_poplargestcity"
drop if _merge == 2
drop _merge
rename pop_largest_city primacy_rate 
la var primacy_rate "Share of the largest city in the urban population (%) in t"

save "processed_datasets/main_dataset_meat", replace

/*******************************************************************************
**# Add Total natural resources rents as share of GDP
*******************************************************************************/
	
* Prepare data
do "codes/secondary_dos/natres_data_creator.do"

* Merge with core dataset, label and save
use "processed_datasets/main_dataset_meat", clear
merge 1:1 code_wb year using "processed_datasets/dataset_rrents"
drop if _merge == 2
drop _merge
la var rrents "Share of natural resource rents in GDP (%) in t"
save "processed_datasets/main_dataset_meat", replace

/*******************************************************************************
**# Add Agricultural Exports data
*******************************************************************************/

* Generate data
do "codes/secondary_dos/agriexp_data_creator.do"

keep if year == 1960 | year == 1965 | year == 1970 | year == 1975 | year == 1980 | year == 1985 | year == 1990 | year == 1995 | year == 2000 | year == 2005 | year == 2010 | year == 2015 | year == 2020

tempfile agriexp_data_temp
save `agriexp_data_temp', replace

use "processed_datasets/main_dataset_meat", clear
merge 1:1 code_wb year using `agriexp_data_temp'
drop _merge merchfao_x

la var agri_sh_x "Share of agricultural exports in merchandise exports (%) in t"
la var agri_sh_x_ma1 "Share of agricultural exports in merchandise exports (%) in t (MA1)"
la var agri_sh_x_ma2 "Share of agricultural exports in merchandise exports (%) in t (MA2)"

rename cash_sh_agri cash_sh_agx
la var cash_sh_agx " Share of cashcrop+meat+hides exports in agricultural exports (%) in t"
rename cash_sh_agri_ma1 cash_sh_agx_ma1
la var cash_sh_agx_ma1 " Share of cashcrop+meat+hides exports in agricultural exports (%) in t (MA1)"
rename cash_sh_agri_ma2 cash_sh_agx_ma2
la var cash_sh_agx_ma2 " Share of cashcrop+meat+hides exports in agricultural exports (%) in t (MA2)"

la var cash_sh_x " Share of cashcrop+meat+hides exports in merchandise exports (%) in t"
la var cash_sh_x_ma1 " Share of cashcrop+meat+hides exports in merchandise exports (%) in t (MA1)"
la var cash_sh_x_ma2 " Share of cashcrop+meat+hides exports in merchandise exports (%) in t (MA2)"

save "processed_datasets/main_dataset_meat", replace

/*******************************************************************************
Add Merchandise Export % in GDP Data
*******************************************************************************/

do "codes/secondary_dos/merchgdp_data_creator.do"

use "processed_datasets/main_dataset_meat", clear
merge 1:1 code_wb year using "processed_datasets/dataset_merchgdp"
drop _merge

rename merch_sh_gdp_c merch_x_gdp
la var merch_x_gdp "Share of merchandise exports in GDP (%) in t"
rename merch_sh_gdp_c_ma1 merch_x_gdp_ma1
la var merch_x_gdp_ma1 "Share of merchandise exports in GDP (%) in t (MA1)"
rename merch_sh_gdp_c_ma2 merch_x_gdp_ma2
la var merch_x_gdp_ma2 "Share of merchandise exports in GDP (%) in t (MA2)"

save "processed_datasets/main_dataset_meat", replace

/*******************************************************************************
Add Metals and Fuels Export data
*******************************************************************************/

do "codes/secondary_dos/metals_data_creator.do"

use "processed_datasets/main_dataset_meat", clear
merge 1:1 code_wb year using "processed_datasets/dataset_metfuel"
drop _merge

drop min_sh_x_n min_sh_x_n_ma1 min_sh_x_n_ma2 min_sh_x

rename min_sh_x_c min_sh_x
la var min_sh_x "Share of mining exports in merchandise exports (%) in t"
rename min_sh_x_c_ma1 min_sh_x_ma1
la var min_sh_x_ma1 "Share of mining exports in merchandise exports (%) in t (MA1)"
rename min_sh_x_c_ma2  min_sh_x_ma2
la var min_sh_x_ma2 "Share of mining exports in merchandise exports (%) in t (MA2)"

save "processed_datasets/main_dataset_meat", replace

/*******************************************************************************
Add Manufactures Export data
*******************************************************************************/

do "codes/secondary_dos/manufactures_data_creator.do"

use "processed_datasets/main_dataset_meat", clear
merge 1:1 code_wb year using "processed_datasets/dataset_mfg"
drop _merge

drop mfg_x mfg_sh_x mfg_x_ma1 mfg_x_ma2

rename mfg_sh_x_c mfg_sh_x
la var mfg_sh_x "Share of manufacturing exports in merchandise exports(%) in t"
rename mfg_sh_x_c_ma1 mfg_sh_x_ma1
la var mfg_sh_x_ma1 "Share of manufacturing exports in merchandise exports(%) in t (MA1)"
rename mfg_sh_x_c_ma2 mfg_sh_x_ma2
la var mfg_sh_x_ma2 "Share of manufacturing exports in merchandise exports(%) in t (MA2)"

save "processed_datasets/main_dataset_meat", replace

/*******************************************************************************
Add data on manufacturing and services share in GDP
*******************************************************************************/

do "codes/secondary_dos/manuserv_data_creator.do"

use "processed_datasets/main_dataset_meat", clear
merge 1:1 code_wb year using "processed_datasets/dataset_manuserv"
drop _merge

drop un_manu_sh_gdp un_serv_sh_gdp_v1 un_serv_sh_gdp_v2 un_serv_sh_gdp_v3 mfg_gdp2010 serv_gdp2010 mfgpanel servpanel

rename manu_sh_gdp mfg_gdp
la var mfg_gdp "Share of manufacturing in GDP (%) in 2015-2020"

rename manu_sh_gdp_ma1 mfg_gdp_ma1
la var mfg_gdp_ma1 "Share of manufacturing in GDP (%) in 2015-2020 (MA1)"

rename manu_sh_gdp_ma2 mfg_gdp_ma2
la var mfg_gdp_ma2 "Share of manufacturing in GDP (%) in 2015-2020 (MA2)"

rename serv_sh_gdp serv_gdp
la var serv_gdp "Share of services in GDP (%) in 2015-2020"

rename serv_sh_gdp_ma1 serv_gdp_ma1
la var serv_gdp_ma1 "Share of services in GDP (%) in 2015-2020 (MA1)"

rename serv_sh_gdp_ma2 serv_gdp_ma2
la var serv_gdp_ma2 "Share of services in GDP (%) in 2015-2020 (MA2)"

rename manu_sh_gdp_c mfgpanel
la var mfgpanel "Share of manufacturing in GDP (%) in t (1960-2020)"

rename manu_sh_gdp_c_ma1 mfgpanel_ma1
la var mfgpanel_ma1 "Share of manufacturing in GDP (%) in t (1960-2020) (MA1)"

rename manu_sh_gdp_c_ma2 mfgpanel_ma2
la var mfgpanel_ma2 "Share of manufacturing in GDP (%) in t (1960-2020) (MA2)"

rename serv_sh_gdp_c servpanel
la var servpanel "Share of services in GDP (%) in t (1960-2020)"

rename serv_sh_gdp_c_ma1 servpanel_ma1
la var servpanel_ma1 "Share of services in GDP (%) in t (1960-2020) (MA1)"

rename serv_sh_gdp_c_ma2 servpanel_ma2
la var servpanel_ma2 "Share of services in GDP (%) in t (1960-2020) (MA2)"

save "processed_datasets/main_dataset_meat", replace

/*******************************************************************************
Labels and name changes
*******************************************************************************/

rename country_wb country
la var country "Country name (from World Bank)"
la var country_wup "Country name (from WUP)"
rename code_wb ccode
la var ccode "Country code (from World Bank)"
la var year "Year t"
la var continent "Continent (N=4)"
rename region_wb region
la var region "Region, from World Bank (N=5)"

/*******************************************************************************
Calculate Natural resources export variables
*******************************************************************************/

gen NRX_a = agri_sh_x + min_sh_x
la var NRX_a "Share of Agricultural + Mining products exports in merchandise exports (%) in t"
gen NRX_a_ma1 = agri_sh_x_ma1 + min_sh_x_ma1
la var NRX_a_ma1 "Share of Agricultural + Mining products exports in merchandise exports (%) in t (MA1)"
gen NRX_a_ma2 = agri_sh_x_ma2 + min_sh_x_ma2
la var NRX_a_ma2 "Share of Agricultural + Mining products exports in merchandise exports (%) in t (MA2)"

gen NRX_b = cash_sh_x + min_sh_x
la var NRX_b "Share of cashcrop+meat+hides + Mining products exports in merchandise exports (%) in t"
gen NRX_b_ma1 = cash_sh_x_ma1 + min_sh_x_ma1
la var NRX_b_ma1 "Share of cashcrop+meat+hides + Mining products exports in merchandise exports (%) in t (MA1)"
gen NRX_b_ma2 = cash_sh_x_ma2 + min_sh_x_ma2
la var NRX_b_ma2 "Share of cashcrop+meat+hides + Mining products exports in merchandise exports (%) in t (MA2)"

foreach x of varlist NRX_a NRX_a_ma1 NRX_a_ma2 NRX_b NRX_b_ma1 NRX_b_ma2{
	replace `x' = 100 if `x' > 100 & `x' != .
}

/*******************************************************************************
Fixing variables
********************************************************************************
We fill fix some values missing for Taiwan
*******************************************************************************/

* Taiwan 2015 and 2020 is missing for merch_x_gdp*, we fix that here
* We use GDP values from Taiwan official statistics and Merch X data from (https://data.wto.org/)

replace merch_x_gdp = 61.8 if year == 2010 & ccode == "TWN"
replace merch_x_gdp_ma1 = 61.8 if year == 2010 & ccode == "TWN"
replace merch_x_gdp_ma2 = 61.8 if year == 2010 & ccode == "TWN"

replace merch_x_gdp = 53.4 if year == 2015 & ccode == "TWN"
replace merch_x_gdp_ma1 = 53.4 if year == 2015 & ccode == "TWN"
replace merch_x_gdp_ma2 = 53.4 if year == 2015 & ccode == "TWN"

replace merch_x_gdp = 51.9 if year == 2020 & ccode == "TWN"
replace merch_x_gdp_ma1 = 51.9 if year == 2020 & ccode == "TWN"
replace merch_x_gdp_ma2 = 51.9 if year == 2020 & ccode == "TWN"

* Taiwan 2015 and 2020 is missing for mfg_x_gdp*, we fix that here
* Data from (https://data.wto.org/)

replace mfg_sh_x = 89.02482617 if year == 2015 & ccode == "TWN"
replace mfg_sh_x = 90.91228049 if year == 2020 & ccode == "TWN"

replace mfg_sh_x_ma1 = 89.02482617 if year == 2015 & ccode == "TWN"
replace mfg_sh_x_ma1 = 90.91228049 if year == 2020 & ccode == "TWN"

replace mfg_sh_x_ma2 = 89.02482617 if year == 2015 & ccode == "TWN"
replace mfg_sh_x_ma2 = 90.91228049 if year == 2020 & ccode == "TWN"

/*******************************************************************************
Save
*******************************************************************************/

save "processed_datasets/main_dataset_meat", replace
